version 1.0
# NOTE: When changing this file you must update _all_ tests that use the shared file

task PreprocessIntervals {
  input {
    File? intervals
    File ref_fasta
    File ref_fasta_fai
    File ref_fasta_dict
    Int? padding
    Int? bin_length
    File? gatk4_jar_override
    
    # Runtime parameters
    String gatk_docker
    Int? mem_gb
    Int? disk_space_gb
    Boolean use_ssd = false
    Int? cpu
    Int? preemptible_attempts
  }
  
  Int machine_mem_mb = select_first([mem_gb, 2]) * 1000
  Int command_mem_mb = machine_mem_mb - 500

  # Determine output filename
  String filename = select_first([intervals, "wgs"])
  String base_filename = basename(filename, ".interval_list")

  command <<<
    set -e
    export GATK_LOCAL_JAR=~{default="/root/gatk.jar" gatk4_jar_override}

    gatk --java-options "-Xmx~{command_mem_mb}m" PreprocessIntervals \
      ~{"-L " + intervals} \
      --sequence-dictionary ~{ref_fasta_dict} \
      --reference ~{ref_fasta} \
      --padding ~{default="250" padding} \
      --bin-length ~{default="1000" bin_length} \
      --interval-merging-rule OVERLAPPING_ONLY \
      --output ~{base_filename}.preprocessed.interval_list
  >>>

  runtime {
    docker: "~{gatk_docker}"
    memory: machine_mem_mb + " MB"
    disks: "local-disk " + select_first([disk_space_gb, 40]) + if use_ssd then " SSD" else " HDD"
    cpu: select_first([cpu, 1])
    preemptible: select_first([preemptible_attempts, 5])
  }

  output {
    File preprocessed_intervals = "~{base_filename}.preprocessed.interval_list"
  }
}

task AnnotateIntervals {
  input {
    File intervals
    File ref_fasta
    File ref_fasta_fai
    File ref_fasta_dict
    File? gatk4_jar_override

    # Runtime parameters
    String gatk_docker
    Int? mem_gb
    Int? disk_space_gb
    Boolean use_ssd = false
    Int? cpu
    Int? preemptible_attempts
  }

  Int machine_mem_mb = select_first([mem_gb, 2]) * 1000
  Int command_mem_mb = machine_mem_mb - 500

  command <<<
    set -e
    export GATK_LOCAL_JAR=~{default="/root/gatk.jar" gatk4_jar_override}

    gatk --java-options "-Xmx~{command_mem_mb}m" AnnotateIntervals \
      -L ~{intervals} \
      --reference ~{ref_fasta} \
      --interval-merging-rule OVERLAPPING_ONLY \
      --output annotated_intervals.tsv
  >>>

  runtime {
    docker: "~{gatk_docker}"
    memory: machine_mem_mb + " MB"
    disks: "local-disk " + select_first([disk_space_gb, ceil(size(ref_fasta, "GB")) + 50]) + if use_ssd then " SSD" else " HDD"
    cpu: select_first([cpu, 1])
    preemptible: select_first([preemptible_attempts, 5])
  }

  output {
    File annotated_intervals = "annotated_intervals.tsv"
  }
}

task CollectCounts {
  input {
    File intervals
    File bam
    File bam_idx
    File ref_fasta
    File ref_fasta_fai
    File ref_fasta_dict
    String? format
    File? gatk4_jar_override

    # Runtime parameters
    String gatk_docker
    Int? mem_gb
    Int? disk_space_gb
    Boolean use_ssd = false
    Int? cpu
    Int? preemptible_attempts
  }

  Int machine_mem_mb = select_first([mem_gb, 7]) * 1000
  Int command_mem_mb = machine_mem_mb - 1000

  # Sample name is derived from the bam filename
  String base_filename = basename(bam, ".bam")
  String counts_filename = if !defined(format) then "~{base_filename}.counts.hdf5" else "~{base_filename}.counts.tsv"

  command <<<
    set -e
    export GATK_LOCAL_JAR=~{default="/root/gatk.jar" gatk4_jar_override}

    gatk --java-options "-Xmx~{command_mem_mb}m" CollectFragmentCounts \
      -L ~{intervals} \
      --input ~{bam} \
      --reference ~{ref_fasta} \
      --format ~{default="HDF5" format} \
      --interval-merging-rule OVERLAPPING_ONLY \
      --output ~{counts_filename}
  >>>

  runtime {
    docker: "~{gatk_docker}"
    memory: machine_mem_mb + " MB"
    disks: "local-disk " + select_first([disk_space_gb, ceil(size(bam, "GB")) + 50]) + if use_ssd then " SSD" else " HDD"
    cpu: select_first([cpu, 1])
    preemptible: select_first([preemptible_attempts, 5])
  }

  output {
    String entity_id = base_filename
    File counts = counts_filename
  }
}

task CollectAllelicCounts {
  input {
    File common_sites
    File bam
    File bam_idx
    File ref_fasta
    File ref_fasta_fai
    File ref_fasta_dict
    Int? minimum_base_quality
    File? gatk4_jar_override

    # Runtime parameters
    String gatk_docker
    Int? mem_gb
    Int? disk_space_gb
    Boolean use_ssd = false
    Int? cpu
    Int? preemptible_attempts
  }

  Int machine_mem_mb = select_first([mem_gb, 13]) * 1000
  Int command_mem_mb = machine_mem_mb - 1000

  # Sample name is derived from the bam filename
  String base_filename = basename(bam, ".bam")

  String allelic_counts_filename = "~{base_filename}.allelicCounts.tsv"

  command <<<
    set -e
    export GATK_LOCAL_JAR=~{default="/root/gatk.jar" gatk4_jar_override}

    gatk --java-options "-Xmx~{command_mem_mb}m" CollectAllelicCounts \
      -L ~{common_sites} \
      --input ~{bam} \
      --reference ~{ref_fasta} \
      --minimum-base-quality ~{default="20" minimum_base_quality} \
      --output ~{allelic_counts_filename}
  >>>

  runtime {
    docker: "~{gatk_docker}"
    memory: machine_mem_mb + " MB"
    disks: "local-disk " + select_first([disk_space_gb, ceil(size(bam, "GB")) + 50]) + if use_ssd then " SSD" else " HDD"
    cpu: select_first([cpu, 1])
    preemptible: select_first([preemptible_attempts, 5])
  }

  output {
    String entity_id = base_filename
    File allelic_counts = allelic_counts_filename
  }
}

task ScatterIntervals {
  input {
    File interval_list
    Int num_intervals_per_scatter

    # Runtime parameters
    String gatk_docker
    Int? mem_gb
    Int? disk_space_gb
    Boolean use_ssd = false
    Int? cpu
    Int? preemptible_attempts
  }

  Int machine_mem_mb = select_first([mem_gb, 2]) * 1000

  String base_filename = basename(interval_list, ".interval_list")

  command <<<
    set -e

    grep @ ~{interval_list} > header.txt
    grep -v @ ~{interval_list} > all_intervals.txt
    split -l ~{num_intervals_per_scatter} --numeric-suffixes all_intervals.txt ~{base_filename}.scattered.
    for i in ~{base_filename}.scattered.*; do cat header.txt $i > $i.interval_list; done
  >>>

  runtime {
    docker: "~{gatk_docker}"
    memory: machine_mem_mb + " MB"
    disks: "local-disk " + select_first([disk_space_gb, 40]) + if use_ssd then " SSD" else " HDD"
    cpu: select_first([cpu, 1])
    preemptible: select_first([preemptible_attempts, 5])
  }

  output {
    Array[File] scattered_interval_lists = glob("~{base_filename}.scattered.*.interval_list")
  }
}

task PostprocessGermlineCNVCalls {
  input {
    String entity_id
    Array[File] chunk_path_tars
    String sample_index
    File? gatk4_jar_override

    # Runtime parameters
    String gatk_docker
    Int? mem_gb
    Int? disk_space_gb
    Boolean use_ssd = false
    Int? cpu
    Int? preemptible_attempts
  }

  Int machine_mem_mb = select_first([mem_gb, 7]) * 1000
  Int command_mem_mb = machine_mem_mb - 1000

  String sample_directory = "SAMPLE_~{sample_index}"  #this is a hardcoded convention in gcnvkernel
  String vcf_filename = "~{entity_id}.vcf.gz"

  String dollar = "$" #WDL workaround for using array[@], see https://github.com/broadinstitute/cromwell/issues/1819

  command <<<
    set -e
    export GATK_LOCAL_JAR=~{default="/root/gatk.jar" gatk4_jar_override}

    #untar chunk_path_tars to CHUNK_0, CHUNK_1, etc. directories and build chunk_paths_command_line="--chunk_path CHUNK_0 ..."
    chunk_path_array=(~{sep=" " chunk_path_tars})
    chunk_paths_command_line=""
    for index in ~{dollar}{!chunk_path_array[@]}; do
      chunk_path_tar=~{dollar}{chunk_path_array[$index]}
      mkdir CHUNK_$index
      tar xzf $chunk_path_tar -C CHUNK_$index
      chunk_paths_command_line="$chunk_paths_command_line --chunk-path CHUNK_$index"
    done

    gatk --java-options "-Xmx~{command_mem_mb}m" PostprocessGermlineCNVCalls \
      $chunk_paths_command_line \
      --sample-directory ~{sample_directory} \
      --output ~{vcf_filename}
  >>>

  runtime {
    docker: "~{gatk_docker}"
    memory: machine_mem_mb + " MB"
    disks: "local-disk " + select_first([disk_space_gb, 40]) + if use_ssd then " SSD" else " HDD"
    cpu: select_first([cpu, 1])
    preemptible: select_first([preemptible_attempts, 5])
  }

  output {
    File vcf = vcf_filename
  }
}
